# packages
library("rio")     # data import/export

# preparation of conjoint data

# load raw data
dat_lodger <- rio::import("lodger-data-raw.sav")
stopifnot(dim(dat_lodger) == c(1669, 170))
names(dat_lodger)[names(dat_lodger) == "W8"] <- "Weight"

# names(dat_lodger)
# codebook <- lapply(dat_lodger, attr, "label")

## features
features <- dat_lodger[, grepl("_seen[[:digit:]]{1}", names(dat_lodger)) ]
names(features) <- sub("_seen1_t", "A", names(features))
names(features) <- sub("_seen2_t", "B", names(features))

## demographics
demographics <- dat_lodger[, c(names(dat_lodger)[seq_len(which(names(dat_lodger) == "partyid_2017"))], "rl2", "rl3", "rl4")]
## outcome is which name was chosen
outcomes <- stats::setNames(dat_lodger[, names(dat_lodger)[grep("QPAIR[[:digit:]]{1}$", names(dat_lodger))] ], paste0("outcome", 1:5))

conj <- 
  cbind(
    # features
    features,
    # outcomes
    outcomes,
    # timing
    stats::setNames(dat_lodger[, grepl("page_Intro[[:digit:]]{1}_timing", names(dat_lodger))], paste0("timing", 1:5)),
    # demographics
    demographics
  )
stopifnot(nrow(conj) == nrow(dat_lodger))

rm(features, demographics, outcomes)

# stack profiles
conj2 <- reshape(conj, 
    varying = list(
        names(conj)[grep("^nameA", names(conj))],
        names(conj)[grep("^nameB", names(conj))],
        names(conj)[grep("^ageA", names(conj))],
        names(conj)[grep("^ageB", names(conj))],
        names(conj)[grep("^volunteerA", names(conj))],
        names(conj)[grep("^volunteerB", names(conj))],
        names(conj)[grep("^hobbyA", names(conj))],
        names(conj)[grep("^hobbyB", names(conj))],
        names(conj)[grep("^politicsA", names(conj))],
        names(conj)[grep("^politicsB", names(conj))],
        names(conj)[grep("^euA", names(conj))],
        names(conj)[grep("^euB", names(conj))],
        names(conj)[grep("^occupationA", names(conj))],
        names(conj)[grep("^occupationB", names(conj))],
        names(conj)[grep("^timing", names(conj))],
        names(conj)[grep("^outcome", names(conj))]
    ),
    v.names = c(
        "nameA", "nameB", 
        "ageA", "ageB", 
        "volunteerA", "volunteerB", 
        "hobbyA", "hobbyB",
        "partyA", "partyB",
        "euA", "euB",
        "occupationA", "occupationB",
        "timing", "outcome"
    ),
    timevar = "pair",
    idvar = "ID",
    direction = "long"
)
stopifnot(nrow(conj2) == 5 * nrow(conj))
conj <- conj2
rm(conj2)

# create conjoint profile display pair variable
conj[["id_pair"]] <- paste0(conj[["ID"]], "_", conj[["pair"]])

# recode outcome so it is a 0/1 indicator of whether profile was chosen (rather than indicator of name chosen)
conj[["outcomeA"]] <- as.integer(conj[["outcome"]] == conj[["nameA"]])
conj[["outcomeB"]] <- as.integer(conj[["outcome"]] == conj[["nameB"]])
conj[["outcome"]] <- NULL

# stack a/b options
conj2 <- reshape(conj,
    varying = list(
        c("nameA", "nameB"), 
        c("ageA", "ageB"), 
        c("volunteerA", "volunteerB"), 
        c("hobbyA", "hobbyB"),
        c("partyA", "partyB"),
        c("euA", "euB"),
        c("occupationA", "occupationB"),
        c("outcomeA", "outcomeB")
    ),
    v.names = c("name", "age", "volunteer", "hobby", "party", "eu", "occupation", "outcome"),
    timevar = "AB",
    idvar = "id_pair",
    direction = "long"
)
stopifnot(nrow(conj2) == 2 * nrow(conj))
conj <- conj2
rm(conj2)

# convert feature variables to labeled factors
conj[["name"]] <- rio::factorize(conj[["name"]])
levels(conj[["name"]])[levels(conj[["name"]]) %in% c("Skipped", "Not Asked")] <- NA_character_
conj[["age"]] <- rio::factorize(conj[["age"]])
levels(conj[["age"]])[levels(conj[["age"]]) %in% c("Skipped", "Not Asked")] <- NA_character_
conj[["volunteer"]] <- rio::factorize(conj[["volunteer"]])
levels(conj[["volunteer"]])[levels(conj[["volunteer"]]) %in% c("Skipped", "Not Asked")] <- NA_character_
conj[["hobby"]] <- rio::factorize(conj[["hobby"]])
levels(conj[["hobby"]])[levels(conj[["hobby"]]) %in% c("Skipped", "Not Asked")] <- NA_character_
conj[["party"]] <- factor(conj[["party"]], levels = c(3,2,1),
                            labels = c("Didn't support a party at the 2017 election",
                             "Supported the Labour Party at the 2017 election",
                             "Supported the Conservative Party at the 2017 election"))
conj[["eu"]] <- factor(conj[["eu"]], levels = c(3,1,2),
                        labels = c("Didn't support a side in the EU referendum",
                          "Supported the Remain campaign in the EU referendum",
                          "Supported the Leave campaign in the EU referendum"))
conj[["occupation"]] <- rio::factorize(conj[["occupation"]])
levels(conj[["occupation"]])[levels(conj[["occupation"]]) %in% c("Skipped", "Not Asked")] <- NA_character_

# Identity variables
conj[["id_party"]] <- ifelse(rio::characterize(conj[["partyid_2017"]]) %in% c("Don't know", "No – None"),
                             rio::characterize(conj$rl2),
                             rio::characterize(conj[["partyid_2017"]]))
conj[["id_brexit"]] <- rio::characterize(conj$rl3)

# export
rio::export(conj, "conjoint-lodger-stacked.rds")

# cleanup
rm(conj)
rm(dat_lodger)
